Here I load the precursor alignments data from all samples. Bed files are intersected with piRNA annotations, and the mapping locations of the reads and 21U-RNA sites are imported - these information is used to select for precursors as reads initiating at the -2 position (these are clearly enriched, see initiation landscapes Rmd).

I then plot the length distributions of these precursors, analyzing the changes in length that occur upon RNAi knockdown of ints-11, mutation of the general elongation factor TFIIS, or both.

library(ggplot2)

setwd("/Users/ab6415/Bioinformatics/Analysis/INTEGRATOR_PAPER_ANALYSES/Fig1_Fig3_precursors_analysis/length/")

piRNA_counts_wholeanimal_E1 <- list.files(path="../counts/wholeworm/",
    pattern="*pis"); piRNA_counts_wholeanimal_E1<-paste0("../counts/wholeworm/",piRNA_counts_wholeanimal_E1,sep="")
piRNA_counts_wholeanimal_E1_longreads <- list.files(path="../counts/wholeworm_longreads/",
    pattern="*pis"); piRNA_counts_wholeanimal_E1_longreads<-paste0("../counts/wholeworm_longreads/",piRNA_counts_wholeanimal_E1_longreads,sep="")
piRNA_counts_germnuclei_total <- list.files(path="../counts/germnuclei_total/",
    pattern="*pis"); piRNA_counts_germnuclei_total<-paste0("../counts/germnuclei_total/",piRNA_counts_germnuclei_total,sep="")
piRNA_counts_germnuclei_fractionation <- list.files(path="../counts/germnuclei_fractionation_rep1/",
    pattern="*pis"); piRNA_counts_germnuclei_fractionation<-paste0("../counts/germnuclei_fractionation_rep1/",piRNA_counts_germnuclei_fractionation,sep="")
piRNA_counts_germnuclei_fractionation_rep2 <- list.files(path="../counts/germnuclei_fractionation_rep2/",
    pattern="*pis"); piRNA_counts_germnuclei_fractionation_rep2<-paste0("../counts/germnuclei_fractionation_rep2/",piRNA_counts_germnuclei_fractionation_rep2,sep="")


piRNA_counts_filenames<-c(piRNA_counts_wholeanimal_E1,piRNA_counts_wholeanimal_E1_longreads,piRNA_counts_germnuclei_total,piRNA_counts_germnuclei_fractionation,piRNA_counts_germnuclei_fractionation_rep2)
piRNA_counts_filenames<-piRNA_counts_filenames[c(1:13,19:21,14:18,27:30,22,23,25,26)]

names <-c("wholeworm_EV_col1","wholeworm_ints11_col1","wholeworm_EV_col2","wholeworm_ints11_col2","wholeworm_EV_96h","wholeworm_ints11_96h","wholeworm_EV_col1_longreads","wholeworm_ints11_col1_longreads","wholeworm_ints11_col2_longreads","germnuclei_total_N2_EV","germnuclei_total_N2_ints11","germnuclei_total_TFIIS_EV","germnuclei_total_TFIIS_ints11","germnuclei_NP_N2_EV","germnuclei_NP_N2_ints11","germnuclei_NP_TFIIS_EV","germnuclei_NP_TFIIS_ints11","germnuclei_CHR_N2_EV","germnuclei_CHR_N2_ints11","germnuclei_CHR_TFIIS_EV","germnuclei_CHR_TFIIS_ints11","germnuclei_rep2_NP_N2_EV","germnuclei_rep2_NP_N2_ints11","germnuclei_rep2_NP_TFIIS_EV","germnuclei_rep2_NP_TFIIS_ints11","germnuclei_rep2_CHR_N2_EV","germnuclei_rep2_CHR_N2_ints11","germnuclei_rep2_CHR_TFIIS_EV","germnuclei_rep2_CHR_TFIIS_ints11")

piRNA_counts_filenames
##  [1] "../counts/wholeworm/Sample_1_EV_col1.fastq.trimmed.sorted.bed.pis"                                          
##  [2] "../counts/wholeworm/Sample_3_ints_11_col1.fastq.trimmed.sorted.bed.pis"                                     
##  [3] "../counts/wholeworm/Sample_4_EV_col2.fastq.trimmed.sorted.bed.pis"                                          
##  [4] "../counts/wholeworm/Sample_6_ints_11_col2.fastq.trimmed.sorted.bed.pis"                                     
##  [5] "../counts/wholeworm/Sample_7_EV_col2_96h.fastq.trimmed.sorted.bed.pis"                                      
##  [6] "../counts/wholeworm/Sample_9_ints_11_col2_96h.fastq.trimmed.sorted.bed.pis"                                 
##  [7] "../counts/wholeworm_longreads/1_EV_col1_S1_R1_001.fastq.trimmed.sorted.bed.pis"                             
##  [8] "../counts/wholeworm_longreads/3_ints-11_col1_S2_R1_001.fastq.trimmed.sorted.bed.pis"                        
##  [9] "../counts/wholeworm_longreads/9_ints-11_col2_96h_S3_R1_001.fastq.trimmed.sorted.bed.pis"                    
## [10] "../counts/germnuclei_total/Sample_13_N2_EV_CR_S63_L007_R1_001.fastq.trimmed.sorted.bed.pis"                 
## [11] "../counts/germnuclei_total/Sample_14_N2_Ints11_CR_S64_L007_R1_001.fastq.trimmed.sorted.bed.pis"             
## [12] "../counts/germnuclei_total/Sample_15_TFIIS_EV_CR_S65_L007_R1_001.fastq.trimmed.sorted.bed.pis"              
## [13] "../counts/germnuclei_total/Sample_16_TFIIS_Ints11_CR_S66_L007_R1_001.fastq.trimmed.sorted.bed.pis"          
## [14] "../counts/germnuclei_fractionation_rep1/7_N2_EV_np_S7_R1_001.fastq.trimmed.sorted.bed.pis"                  
## [15] "../counts/germnuclei_fractionation_rep1/8_N2_ints-11RNAi_np_S8_R1_001.fastq.trimmed.sorted.bed.pis"         
## [16] "../counts/germnuclei_fractionation_rep1/9_TFIIS_EV_np_S9_R1_001.fastq.trimmed.sorted.bed.pis"               
## [17] "../counts/germnuclei_fractionation_rep1/10_TFIIS_ints-11RNAi_np_S10_R1_001.fastq.trimmed.sorted.bed.pis"    
## [18] "../counts/germnuclei_fractionation_rep1/25_N2_EV_chr_S11_R1_001.fastq.trimmed.sorted.bed.pis"               
## [19] "../counts/germnuclei_fractionation_rep1/26_N2_ints-11RNAi_chr_S12_R1_001.fastq.trimmed.sorted.bed.pis"      
## [20] "../counts/germnuclei_fractionation_rep1/27_TFIIS_EV_chr_S13_R1_001.fastq.trimmed.sorted.bed.pis"            
## [21] "../counts/germnuclei_fractionation_rep1/28_TFIIS_ints-11RNAi_chr_S14_R1_001.fastq.trimmed.sorted.bed.pis"   
## [22] "../counts/germnuclei_fractionation_rep2/5_N2_EV_np_S5_R1_001.fastq.trimmed.sorted.bed.pis"                  
## [23] "../counts/germnuclei_fractionation_rep2/6_N2_ints11RNAi_np_S6_R1_001.fastq.trimmed.sorted.bed.pis"          
## [24] "../counts/germnuclei_fractionation_rep2/7_TFIIS_EV_np_S7_R1_001.fastq.trimmed.sorted.bed.pis"               
## [25] "../counts/germnuclei_fractionation_rep2/8_TFIIS_ints11RNAi_np_S8_R1_001.fastq.trimmed.sorted.bed.pis"       
## [26] "../counts/germnuclei_fractionation_rep2/1_N2_EV_chr_S1_R1_001.fastq.trimmed.sorted.bed.pis"                 
## [27] "../counts/germnuclei_fractionation_rep2/13_N2_ints11RNAi_chr_techrep_S9_R1_001.fastq.trimmed.sorted.bed.pis"
## [28] "../counts/germnuclei_fractionation_rep2/3_TFIIS_EV_chr_S3_R1_001.fastq.trimmed.sorted.bed.pis"              
## [29] "../counts/germnuclei_fractionation_rep2/4_TFIIS_ints11RNAi_chr_S4_R1_001.fastq.trimmed.sorted.bed.pis"
names         
##  [1] "wholeworm_EV_col1"                "wholeworm_ints11_col1"           
##  [3] "wholeworm_EV_col2"                "wholeworm_ints11_col2"           
##  [5] "wholeworm_EV_96h"                 "wholeworm_ints11_96h"            
##  [7] "wholeworm_EV_col1_longreads"      "wholeworm_ints11_col1_longreads" 
##  [9] "wholeworm_ints11_col2_longreads"  "germnuclei_total_N2_EV"          
## [11] "germnuclei_total_N2_ints11"       "germnuclei_total_TFIIS_EV"       
## [13] "germnuclei_total_TFIIS_ints11"    "germnuclei_NP_N2_EV"             
## [15] "germnuclei_NP_N2_ints11"          "germnuclei_NP_TFIIS_EV"          
## [17] "germnuclei_NP_TFIIS_ints11"       "germnuclei_CHR_N2_EV"            
## [19] "germnuclei_CHR_N2_ints11"         "germnuclei_CHR_TFIIS_EV"         
## [21] "germnuclei_CHR_TFIIS_ints11"      "germnuclei_rep2_NP_N2_EV"        
## [23] "germnuclei_rep2_NP_N2_ints11"     "germnuclei_rep2_NP_TFIIS_EV"     
## [25] "germnuclei_rep2_NP_TFIIS_ints11"  "germnuclei_rep2_CHR_N2_EV"       
## [27] "germnuclei_rep2_CHR_N2_ints11"    "germnuclei_rep2_CHR_TFIIS_EV"    
## [29] "germnuclei_rep2_CHR_TFIIS_ints11"
piRNA_counts <- lapply(piRNA_counts_filenames,function(i){
  read.csv(paste(i,sep=""), header=FALSE,sep="\t")
})


piRNA_counts_precfiltered <- lapply(piRNA_counts, function(df){
 df$plus_d <- df$V10-df$V2
 df$minus_d <- df$V3-df$V11
 df_plus<-df[which(df$plus_d==2 & df$V8=="+"),]
 df_minus<-df[which(df$minus_d==(2) & df$V8=="-"),]
 return(rbind(df_plus,df_minus))
})

motifdep_piRNA_precursors<-lapply(piRNA_counts_precfiltered,function(df){return(df[c(grep("^21UR-",df$V12),grep("^piRNA_type_1",df$V12)),])})
motifind_piRNA_precursors<-lapply(piRNA_counts_precfiltered,function(df){return(df[grep("^piRNA_type_2",df$V12),])})

for (i in c(14:29)){
  print(names[i])
  write.table(motifdep_piRNA_precursors[[i]],file=paste(paste("/Users/ab6415/Bioinformatics/Analysis/INTEGRATOR_PAPER_ANALYSES/Fig1_Fig3_precursors_analysis/length/precursor_tables/",names[i],sep = ""),".txt",sep=""),quote = FALSE,sep = "\t")
}
## [1] "germnuclei_NP_N2_EV"
## [1] "germnuclei_NP_N2_ints11"
## [1] "germnuclei_NP_TFIIS_EV"
## [1] "germnuclei_NP_TFIIS_ints11"
## [1] "germnuclei_CHR_N2_EV"
## [1] "germnuclei_CHR_N2_ints11"
## [1] "germnuclei_CHR_TFIIS_EV"
## [1] "germnuclei_CHR_TFIIS_ints11"
## [1] "germnuclei_rep2_NP_N2_EV"
## [1] "germnuclei_rep2_NP_N2_ints11"
## [1] "germnuclei_rep2_NP_TFIIS_EV"
## [1] "germnuclei_rep2_NP_TFIIS_ints11"
## [1] "germnuclei_rep2_CHR_N2_EV"
## [1] "germnuclei_rep2_CHR_N2_ints11"
## [1] "germnuclei_rep2_CHR_TFIIS_EV"
## [1] "germnuclei_rep2_CHR_TFIIS_ints11"

Precursor length distributions

library(reshape2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
density_plot_for_multiple_samples<-function(dflist,indices,title){
  length.mlt <- melt(lapply(dflist,function(df){return(df$V4)}))
  length.mlt$L1 <- as.numeric(length.mlt$L1)
  length.mlt$sample <- names[length.mlt$L1]
  
  length.mlt <- length.mlt[which(length.mlt$L1 %in% indices),]
  length.mlt$L1 <- as.factor(length.mlt$L1)
  
  print(ggplot(length.mlt)+geom_density(aes(x=value,group=sample,col=sample,fill=sample),alpha=0.1)+theme_classic()+
    ggtitle(title)+xlab("precursor length"))
}


histogram_for_multiple_samples<-function(dflist,indices,title,filename){
   
  length.mlt <- melt(lapply(dflist,function(df){return(df$V4)}))
  length.mlt$L1 <- as.numeric(length.mlt$L1)
  length.mlt$sample <- names[length.mlt$L1]
  
  length.mlt <- length.mlt[which(length.mlt$L1 %in% indices),]
  length.mlt$L1 <- as.factor(length.mlt$L1)
  
  kernel_1<-density(length.mlt[which(length.mlt$L1 == levels((length.mlt$L1))[1]),"value"])
  kernel_2<-density(length.mlt[which(length.mlt$L1 == levels((length.mlt$L1))[2]),"value"])
  
  max_1<-kernel_1$x[which(diff(sign(diff(kernel_1$y)))==-2)+1]
  max_2<-kernel_2$x[which(diff(sign(diff(kernel_2$y)))==-2)+1]
  
  #print(paste(paste(names[indices[1]],"max:",sep=" "),max_1,sep=" "))
  #print(paste(paste(names[indices[2]],"max:",sep=" "),max_2,sep=" "))
  
  linedata<-data.frame(xvar1=kernel_1$x,xvar2=kernel_2$x,yvar1=kernel_1$y,yvar2=kernel_2$y)
  
  print(ggplot(length.mlt, aes(value)) + 
    geom_histogram(aes(y = ..density..,fill=sample), position = 'identity',breaks=seq(18,75),alpha=0.5)+
    theme_classic()+
    scale_fill_manual(values = c("orange","lightblue"))+
    ggtitle(title)+xlab("precursor length")+ylab("fraction of sequences")+
    coord_cartesian(x=c(18,75))+
    geom_line(data=linedata,aes(x=linedata$xvar1,y=linedata$yvar1,col=names[indices[1]]))+
    geom_line(data=linedata,aes(x=linedata$xvar2,y=linedata$yvar2,col=names[indices[2]]))+
    scale_color_manual(values = c("orange","lightblue"))+
    guides(col=FALSE))
  ggsave(filename)
}


histogram_for_multiple_samples_totalreads<-function(dflist,indices,title,filename){
  length.mlt<-data.frame()
  for (i in seq(length(dflist))){
    df<-dflist[[i]]
    df$L1<-rep(i,nrow(df))
    length.mlt<-rbind(length.mlt,df[,c("L1","V4","V6")])
  }
  
  colnames(length.mlt)[2]<-"value"
  length.mlt$L1 <- as.numeric(length.mlt$L1)
  length.mlt$sample <- names[length.mlt$L1]
  
  length.mlt <- length.mlt[which(length.mlt$L1 %in% indices),]
  length.mlt$L1 <- as.factor(length.mlt$L1)
  
  totalreads_a<-c()
  data_a<-length.mlt[which(length.mlt$L1 == levels((length.mlt$L1))[1]),c("value","V6")]
  for (i in seq(nrow(data_a))){
    totalreads_a<-c(totalreads_a,rep(data_a[i,"value"],data_a[i,"V6"]))
  }
  totalreads_b<-c()
  data_b<-length.mlt[which(length.mlt$L1 == levels((length.mlt$L1))[2]),c("value","V6")]
  for (i in seq(nrow(data_b))){
    totalreads_b<-c(totalreads_b,rep(data_b[i,"value"],data_b[i,"V6"]))
  }
  
  kernel_1<-density(totalreads_a,bw=1)
  max_1<-kernel_1$x[which(diff(sign(diff(kernel_1$y)))==-2)+1]
  
  kernel_2<-density(totalreads_b,bw=1)
  max_2<-kernel_2$x[which(diff(sign(diff(kernel_2$y)))==-2)+1]
  
  #print(paste(paste(names[indices[1]],"max:",sep=" "),max_1,sep=" "))
  #print(paste(paste(names[indices[2]],"max:",sep=" "),max_2,sep=" "))
  
  linedata<-data.frame(xvar1=kernel_1$x,xvar2=kernel_2$x,yvar1=kernel_1$y,yvar2=kernel_2$y)

  print(ggplot(length.mlt, aes(value)) + 
    geom_histogram(aes(y = ..density..,weight=V6,fill=factor(sample)), position = 'identity',breaks=seq(18,75),alpha=0.5)+
    theme_classic()+
    scale_fill_manual(values = c("orange","lightblue"))+
    ggtitle(title)+xlab("precursor length")+ylab("fraction of sequences")+
    coord_cartesian(x=c(18,75))+
    geom_line(data=linedata,aes(x=linedata$xvar1,y=linedata$yvar1,col=names[indices[1]]))+
    geom_line(data=linedata,aes(x=linedata$xvar2,y=linedata$yvar2,col=names[indices[2]]))+
    scale_color_manual(values=c("orange","lightblue"))+
    guides(col=FALSE))
  
  ggsave(filename)
}


#plotting pairs of samples

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(7,8),"whole animal","whole_animal_N2_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples_totalreads(motifdep_piRNA_precursors,c(7,8),"whole animal","whole_animal_N2_ints11_fracreads.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(14,15),"NP","NP_rep1_N2_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(14,16),"NP","NP_rep1_N2_tfiis_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(14,17),"NP","NP_rep1_N2_tfiis_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(18,19),"CHR","CHR_rep1_N2_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(18,20),"CHR","CHR_rep1_N2_tfiis_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(18,21),"CHR","CHR_rep1_N2_tfiis_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(22,23),"NP","NP_rep2_N2_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(22,24),"NP","NP_rep2_N2_tfiis_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(22,25),"NP","NP_rep2_N2_tfiis_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(26,27),"CHR","CHR_rep2_N2_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(26,28),"CHR","CHR_rep2_N2_ints11_techrep_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples(motifdep_piRNA_precursors,c(26,29),"CHR","CHR_rep2_N2_tfiis_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

#plotting multiple samples together

boxplot_for_multiple_samples<-function(dflist,indices,title,filename){
   
  length.mlt <- melt(lapply(dflist,function(df){return(df$V4)}))
  length.mlt$L1 <- as.numeric(length.mlt$L1)
  length.mlt$sample <- names[length.mlt$L1]
  
  length.mlt <- length.mlt[which(length.mlt$L1 %in% indices),]
  length.mlt$L1 <- factor(length.mlt$L1,levels=indices)
  
  print(ggplot(length.mlt) + 
    geom_boxplot(aes(y=value,x=L1), position = 'identity',binwidth=1,alpha=0.5)+
    theme_classic()+
    scale_fill_manual(values = c("orange","lightblue"))+
    ggtitle(title)+ylab("precursor length")+
    scale_x_discrete(labels=names[indices])+
    theme(axis.text.x= element_text(angle=45,vjust=1,hjust=1)))
  ggsave(filename)
}

violin_for_multiple_samples<-function(dflist,indices,title,filename){
   
  length.mlt <- melt(lapply(dflist,function(df){return(df$V4)}))
  length.mlt$L1 <- as.numeric(length.mlt$L1)
  length.mlt$sample <- names[length.mlt$L1]
  
  length.mlt <- length.mlt[which(length.mlt$L1 %in% indices),]
  length.mlt$L1 <- factor(length.mlt$L1,levels=indices)
  
  print(ggplot(length.mlt) + 
    geom_violin(aes(y=value,x=L1,fill=L1), position = 'identity',bw=3,alpha=0.5)+
    theme_classic()+
    ggtitle(title)+ylab("precursor length")+
    scale_x_discrete(labels=names[indices])+
    theme(axis.text.x= element_text(angle=45,vjust=1,hjust=1)))
}


boxplot_for_multiple_samples(motifdep_piRNA_precursors,c(1:6),title="whole animal","precursor_length_increase_ints11RNAi.pdf")
## Warning: Ignoring unknown parameters: binwidth
## Saving 7 x 5 in image

violin_for_multiple_samples(motifdep_piRNA_precursors,c(1:6),title="whole animal","precursor_length_increase_ints11RNAi_violin.pdf")

boxplot_for_multiple_samples(motifdep_piRNA_precursors,c(14:29),title="fractionation experiment","precursor_length_fractionationexp.pdf")
## Warning: Ignoring unknown parameters: binwidth
## Saving 7 x 5 in image

violin_for_multiple_samples(motifdep_piRNA_precursors,c(14:29),title="fractionation experiment","precursor_length_fractionationexp_violin.pdf")

Here I combined the data from the two replicates in two ways: 1. computing the distributions separately and averaging the density at each length (has the disadvantage of weighting equally samples with lots of precursors and samples with lower number of precursors which are more noisy) - “averaged” 2. adding the distributions (gives more weight to samples with more depth) - “combined”

Averaged distributions:

histogram_for_multiple_samples_averagedist<-function(dflist,indices,title,filename){
   
  length.mlt <- melt(lapply(dflist,function(df){return(df$V4)}))
  length.mlt$L1 <- as.numeric(length.mlt$L1)
  length.mlt$sample <- names[length.mlt$L1]
  
  hist_cond1 <- hist(length.mlt[which(length.mlt$L1 %in% indices[1]),"value"],breaks=seq(17,76),plot = FALSE)
  hist_cond2 <- hist(length.mlt[which(length.mlt$L1 %in% indices[2]),"value"],breaks=seq(17,76),plot = FALSE)
  hist_cond3<- hist(length.mlt[which(length.mlt$L1 %in% indices[3]),"value"],breaks=seq(17,76),plot = FALSE)
  hist_cond4 <- hist(length.mlt[which(length.mlt$L1 %in% indices[4]),"value"],breaks=seq(17,76),plot = FALSE)

  avg_density_cond12<-0.5*hist_cond1$density+0.5*hist_cond2$density
  avg_density_cond34<-0.5*hist_cond3$density+0.5*hist_cond4$density
  
  length_distributions<-data.frame(
    density=c(avg_density_cond12,avg_density_cond34),
    cond=c(rep(names[indices[1]],length(avg_density_cond12)),rep(names[indices[3]],length(avg_density_cond34))),
    length=c(18:76,18:76))
  
  print(ggplot(length_distributions, aes(y=density, x=length,fill=cond)) + 
    geom_col(position = 'identity',alpha=0.5)+
    theme_classic()+
    scale_fill_manual(values = c("orange","lightblue"))+
    ggtitle(title)+xlab("precursor length")+ylab("fraction of sequences")+
    coord_cartesian(x=c(18,75)))
  ggsave(filename)
}


histogram_for_multiple_samples_averagedist(motifdep_piRNA_precursors,c(14,22,15,23),"NP","averaged_NP_N2_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples_averagedist(motifdep_piRNA_precursors,c(14,22,16,24),"NP","averaged_NP_N2_tfiis_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples_averagedist(motifdep_piRNA_precursors,c(14,22,17,25),"NP","averaged_NP_N2_tfiis_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples_averagedist(motifdep_piRNA_precursors,c(18,26,19,27),"CHR","averaged_CHR_N2_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples_averagedist(motifdep_piRNA_precursors,c(18,26,20,28),"CHR","averaged_CHR_N2_tfiis_fracseqs.pdf")
## Saving 7 x 5 in image

histogram_for_multiple_samples_averagedist(motifdep_piRNA_precursors,c(18,26,21,29),"CHR","averaged_CHR_N2_tfiis_ints11_fracseqs.pdf")
## Saving 7 x 5 in image

Combined distributions

histogram_for_multiple_samples_combineddist_densityfit<-function(dflist,indices,title,filename){
  
  length.mlt <- melt(lapply(dflist,function(df){return(df$V4)}))
  length.mlt$L1 <- as.numeric(length.mlt$L1)
  length.mlt <- length.mlt[which(length.mlt$L1 %in% indices),]

  length.mlt[which(length.mlt$L1==indices[2]),"L1"]<-indices[1]
  length.mlt[which(length.mlt$L1==indices[4]),"L1"]<-indices[3]
  
  length.mlt$sample <- names[length.mlt$L1]
  length.mlt$L1 <- as.factor(length.mlt$L1)

  a<-hist(length.mlt[which(length.mlt$L1 == levels((length.mlt$L1))[1]),"value"],plot = FALSE,breaks = seq(130))
  b<-hist(length.mlt[which(length.mlt$L1 == levels((length.mlt$L1))[2]),"value"],plot = FALSE,breaks = seq(130))
  
  kernel_1<-density(length.mlt[which(length.mlt$L1 == levels((length.mlt$L1))[1]),"value"])
  max_1<-kernel_1$x[which(diff(sign(diff(kernel_1$y)))==-2)+1]
  
  kernel_2<-density(length.mlt[which(length.mlt$L1 == levels((length.mlt$L1))[2]),"value"])
  max_2<-kernel_2$x[which(diff(sign(diff(kernel_2$y)))==-2)+1]
  
  print(max_1)
  print(max_2)
  
  linedata<-data.frame(xvar1=kernel_1$x,xvar2=kernel_2$x,yvar1=kernel_1$y,yvar2=kernel_2$y)
  
  print(ggplot(length.mlt, aes(value)) + 
    geom_histogram(aes(y = ..density..,fill=sample), position = 'identity',breaks=a$breaks,alpha=0.5)+
    theme_classic()+
    scale_fill_manual(values = c("orange","lightblue"))+
    ggtitle(title)+xlab("precursor length")+ylab("fraction of sequences")+
    coord_cartesian(x=c(18,75))+
    geom_line(data=linedata,aes(x=linedata$xvar1,y=linedata$yvar1,col=names[indices[1]]))+
    geom_line(data=linedata,aes(x=linedata$xvar2,y=linedata$yvar2,col=names[indices[3]]))+
    scale_color_manual(values=c("orange","lightblue"))+
    guides(col=FALSE))
  ggsave(filename)

}


histogram_for_multiple_samples_combineddist_densityfit(motifdep_piRNA_precursors,c(14,22,18,26),"NP_vs_CHR","combined_NP_vs_CHR_N2EV_fracseqs.pdf")
## [1] 28.26018 42.07639 48.74210 56.13499 59.89203 67.76969 71.04195 75.04138
## [1] 26.86854 48.21537 74.64477
## Saving 7 x 5 in image

histogram_for_multiple_samples_combineddist_densityfit(motifdep_piRNA_precursors,c(14,22,15,23),"NP","combined_NP_N2_ints11_fracseqs.pdf")
## [1] 28.26018 42.07639 48.74210 56.13499 59.89203 67.76969 71.04195 75.04138
## [1] 28.27265 46.43604 58.07596
## Saving 7 x 5 in image

histogram_for_multiple_samples_combineddist_densityfit(motifdep_piRNA_precursors,c(14,22,16,24),"NP","combined_NP_N2_tfiis_fracseqs.pdf")
## [1] 28.26018 42.07639 48.74210 56.13499 59.89203 67.76969 71.04195 75.04138
## [1] 19.60745 28.96681 50.93009 66.90339 70.27276 74.89004
## Saving 7 x 5 in image

histogram_for_multiple_samples_combineddist_densityfit(motifdep_piRNA_precursors,c(14,22,17,25),"NP","combined_NP_N2_tfiis_ints11_fracseqs.pdf")
## [1] 28.26018 42.07639 48.74210 56.13499 59.89203 67.76969 71.04195 75.04138
## [1] 29.24382 46.98125
## Saving 7 x 5 in image

histogram_for_multiple_samples_combineddist_densityfit(motifdep_piRNA_precursors,c(18,26,19,27),"CHR","combined_CHR_N2_ints11_fracseqs.pdf")
## [1] 26.86854 48.21537 74.64477
## [1] 27.35717 47.10878
## Saving 7 x 5 in image

histogram_for_multiple_samples_combineddist_densityfit(motifdep_piRNA_precursors,c(18,26,20,28),"CHR","combined_CHR_N2_tfiis_fracseqs.pdf")
## [1] 26.86854 48.21537 74.64477
## [1] 20.15052 28.99874 51.11929
## Saving 7 x 5 in image

histogram_for_multiple_samples_combineddist_densityfit(motifdep_piRNA_precursors,c(18,26,21,29),"CHR","combined_CHR_N2_tfiis_ints11_fracseqs.pdf")
## [1] 26.86854 48.21537 74.64477
## [1] 29.46653 51.49375
## Saving 7 x 5 in image

Here I bootstrap the combined distributions of chromatin bound precursors in order to estimate how reliable the peak position is. I do 2000 bootstrap repeats, sampling 3000 total precursor sequences, without replacement, and with a sampling probability proportional to the abundance of each sequence in the set.

#setting up the function to find maxima
getmaxima<-function(df){
df<-df[-which(df$V4<23),]
hist(df$V4,breaks = seq(0,75),freq = FALSE)
lines(density(df$V4),col="green")
kernel<-density(df$V4)
maxima<-kernel$x[which(diff(sign(diff(kernel$y)))==-2)+1]
return(maxima)
}
maxima<-lapply(motifdep_piRNA_precursors[c(18:21,26:29)], FUN = getmaxima)

getmaxima_bootstrap<-function(df,nboots,nprecs){

df<-df[-which(df$V4<23),]
maxs_peak1<-c()
maxs_peak2<-c()

for (i in seq(nboots)){

df_sample<-sample(df$V4,size = nprecs,prob = df$V6)
if (i<11){ 
hist(df_sample,breaks = seq(0,75),freq = FALSE,plot=FALSE)
lines(density(df_sample),col="green")
}
kernel<-density(df_sample)
maxima<-kernel$x[which(diff(sign(diff(kernel$y)))==-2)+1]

maxs_peak1<-c(maxs_peak1,maxima[1])
maxs_peak2<-c(maxs_peak2,maxima[2])
}
return(data.frame(peak1=maxs_peak1,peak2=maxs_peak2))
}


chbound_precursors<-motifdep_piRNA_precursors[c(18:21,26:29)]
chbound_precursors_repscombined<-list(rbind(chbound_precursors[[1]],chbound_precursors[[5]]),rbind(chbound_precursors[[2]],chbound_precursors[[6]]),rbind(chbound_precursors[[3]],chbound_precursors[[7]]),rbind(chbound_precursors[[4]],chbound_precursors[[8]]))


for (i in seq(4)){print(nrow(chbound_precursors_repscombined[[i]]))}
## [1] 18228
## [1] 4897
## [1] 13587
## [1] 7777
bootstrapped_peaks<-lapply(chbound_precursors_repscombined,FUN=getmaxima_bootstrap,nboots=2000,nprecs=3000)
## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

## Warning in hist.default(df_sample, breaks = seq(0, 75), freq = FALSE, plot =
## FALSE): argument 'freq' is not made use of

peak1_data<-length.mlt <- melt(lapply(bootstrapped_peaks,function(df){return(df$peak1)}))
peak2_data<-length.mlt <- melt(lapply(bootstrapped_peaks,function(df){return(df$peak2)}))

ggplot(peak1_data) + 
    geom_boxplot(aes(y = value,x=as.factor(L1)),outlier.shape = NA)+
    theme_classic()+
    xlab("sample")+ylab("peak position")

ggplot(peak2_data) + 
    geom_boxplot(aes(y = value,x=as.factor(L1)),outlier.shape = NA)+
    theme_classic()+
    xlab("sample")+ylab("peak position")

data_summary <- function(x) {
   m <- mean(x)
   ymin <- m-sd(x)
   ymax <- m+sd(x)
   return(c(y=m,ymin=ymin,ymax=ymax))
}

ggplot(peak1_data,aes(y = value,x=as.factor(L1))) + 
    geom_violin(draw_quantiles = NULL,trim=FALSE,bw=0.075)+
    theme_classic()+
    xlab("sample")+ylab("peak position")+
    stat_summary(fun.data=data_summary)

ggsave("bootstrap_precursorpeaks_peak1_violin.pdf")
## Saving 7 x 5 in image
ggplot(peak2_data,aes(y = value,x=as.factor(L1))) + 
    geom_violin(draw_quantiles = NULL, trim=FALSE,bw=0.075)+
    theme_classic()+
    xlab("sample")+ylab("peak position")+
    stat_summary(fun.data=data_summary)

ggsave("bootstrap_precursorpeaks_peak2_violin.pdf")
## Saving 7 x 5 in image
ggplot(peak1_data,aes(y = value,x=as.factor(L1))) + 
    geom_boxplot(outlier.shape = NA)+
    theme_classic()+
    xlab("sample")+ylab("peak position")

ggsave("bootstrap_precursorpeaks_peak1_boxplot.pdf")
## Saving 7 x 5 in image
ggplot(peak2_data,aes(y = value,x=as.factor(L1))) + 
    geom_boxplot(outlier.shape = NA)+
    theme_classic()+
    xlab("sample")+ylab("peak position")

ggsave("bootstrap_precursorpeaks_peak2_boxplot.pdf")
## Saving 7 x 5 in image
lendist_npchr_pairs<-function(dflist,filename){
   
  length.mlt <- melt(lapply(dflist,function(df){return(df$V4)}))
  length.mlt$L1 <- as.numeric(length.mlt$L1)
  length.mlt$sample <- names[length.mlt$L1]
  length.mlt$L1 <- as.factor(length.mlt$L1)
  
  length.mlt<-length.mlt[which(length.mlt$L1 %in% c(14:29)),]
  length.mlt$L1<-factor(length.mlt$L1,levels=c(14,18,15,19,16,20,17,21,22,26,23,27,24,28,25,29))
  
  print(ggplot(length.mlt) + 
    geom_boxplot(aes(y = value,x=L1),outlier.shape = NA)+
    theme_classic()+
    xlab("sample")+ylab("precursor length")+
    scale_x_discrete(labels=names[c(14,18,15,19,16,20,17,21,22,26,23,27,24,28,25,29)])+
    theme(axis.text.x= element_text(angle=45,vjust=1,hjust=1)))
    ggsave("npchr_pairs_lendist.pdf")
  
    print(ggplot(length.mlt) + 
    geom_violin(aes(y = value,x=L1,fill=L1),outlier.shape = NA)+
    theme_classic()+
    xlab("sample")+ylab("precursor length")+
    scale_x_discrete(labels=names[c(14,18,15,19,16,20,17,21,22,26,23,27,24,28,25,29)])+
    theme(axis.text.x= element_text(angle=45,vjust=1,hjust=1)))
    ggsave("npchr_pairs_lendist_violin.pdf")

}

lendist_npchr_pairs(motifdep_piRNA_precursors)
## Saving 7 x 5 in image
## Warning: Ignoring unknown parameters: outlier.shape

## Saving 7 x 5 in image

piRNA_long_fraction_numseqs<- data.frame(fraction=unlist(lapply(motifdep_piRNA_precursors, function(df){
  return(nrow(df[which(df$V4>41),])/nrow(df))})),names=factor(names,levels=names))
ggplot(piRNA_long_fraction_numseqs)+geom_col(aes(y=fraction,x=names))+
    theme_classic()+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    ylab("fraction of precursors>41nt")

pausing_signal_strength<-read.table("/Users/ab6415/Bioinformatics/Analysis/INTEGRATOR_PAPER_ANALYSES/Reference_pausing_signal_strength/all_pis_typeI_pausingscores.bed",header=TRUE)
pausing_signal_strength<-pausing_signal_strength[,c(4,7:12)]

pausing_signal_strength$tm_upstream_bins<-factor(pausing_signal_strength$tm_upstream_bins,levels =c("(-316,-60.4]","(-60.4,-18.9]","(-18.9,16.5]","(16.5,59.1]","(59.1,307]"))
pausing_signal_strength$tm_downstream_bins<-factor(pausing_signal_strength$tm_downstream_bins,levels =c("(-713,-94.8]","(-94.8,-18.3]","(-18.3,39.8]","(39.8,101]","(101,412]"))
pausing_signal_strength$tm_total_bins<-factor(pausing_signal_strength$tm_total_bins,levels =c("(-620,-108]","(-108,-26.4]","(-26.4,40.1]","(40.1,112]","(112,486]"))

motifdep_piRNA_precursors_pausingbins<-lapply(motifdep_piRNA_precursors,function(df){return(merge(df,pausing_signal_strength,by.x="V12",by.y="V4",all.x=TRUE))})


histograms_pausing_bins<-function(df,title,filename){
  
  df_total<-df[which(df$tm_total_bins=="(-620,-108]" | df$tm_total_bins=="(112,486]"),]
  
  df_total_low<-df[which(df$tm_total_bins=="(-620,-108]"),"V4"]
  df_total_high<-df[which(df$tm_total_bins=="(112,486]"),"V4"]
  
  kernel_1<-density(df_total_low)
  max_1<-kernel_1$x[which(diff(sign(diff(kernel_1$y)))==-2)+1]
  kernel_2<-density(df_total_high)
  max_2<-kernel_2$x[which(diff(sign(diff(kernel_2$y)))==-2)+1]
  linedata<-data.frame(xvar1=kernel_1$x,xvar2=kernel_2$x,yvar1=kernel_1$y,yvar2=kernel_2$y)
  print(max_1)
  print(max_2)
  
ggplot(df_total, aes(V4, fill=tm_total_bins)) + 
    geom_histogram(aes(y = ..count..), position = 'identity',binwidth=1,alpha=0.5)+
    theme_classic()+
    scale_fill_manual(values = c("orange","lightblue"))+
    ggtitle(title)+xlab("precursor length")+ylab("fraction of sequences")+
    xlim(18,75)+
    geom_line(data=linedata,aes(x=linedata$xvar1,y=linedata$yvar1*length(df_total_low),fill=NA))+
    geom_line(data=linedata,aes(x=linedata$xvar2,y=linedata$yvar2*length(df_total_high),fill=NA))
    ggsave(paste(paste("/Users/ab6415/Bioinformatics/Analysis/INTEGRATOR_PAPER_ANALYSES/Fig1_Fig3_precursors_analysis/length/pausing_data/hist_",filename,sep=""),"pdf",sep="."))
    
  ggplot(df, aes(y=V4, fill=tm_total_bins))+geom_boxplot()+ggtitle(paste("total Tm",title,sep=" "))+ylab("precursor length")+theme_classic()
  ggsave(paste(paste("/Users/ab6415/Bioinformatics/Analysis/INTEGRATOR_PAPER_ANALYSES/Fig1_Fig3_precursors_analysis/length/pausing_data/boxplots_",filename,sep=""),".pdf",sep="."))

  # df_downstream<-df[which(df$tm_downstream_bins=="(-713,-94.8]" | df$tm_downstream_bins=="(101,412]"),]
  # print(ggplot(df_downstream, aes(V4, fill=tm_downstream_bins)) + 
  #   geom_histogram(aes(y = ..count..), position = 'identity',binwidth=1,alpha=0.2)+
  #   theme_classic()+
  #   scale_fill_manual(values=c("orange","lightblue"))+
  #   ggtitle(title)+xlab("precursor length")+
  #   xlim(18,75))
  # print(ggplot(df, aes(y=V4, fill=tm_downstream_bins))+geom_boxplot()+ggtitle(paste("downstream Tm",title,sep=" "))+ylab("precursor length"))
  # 
}

for (i in seq(1:length(motifdep_piRNA_precursors_pausingbins))){
  histograms_pausing_bins(motifdep_piRNA_precursors_pausingbins[[i]],names[i],names[i])
}
## [1] 29.20842 49.98472
## [1] 28.30322 45.43078 49.97331
## Warning: Ignoring unknown aesthetics: fill

## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 47 rows containing missing values (geom_path).
## Warning: Removed 41 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.22416 49.79038
## [1] 28.62786 49.77801
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 70 rows containing missing values (geom_path).
## Warning: Removed 67 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 28.76118 49.94928
## [1] 27.56227 50.00020
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 50 rows containing missing values (geom_path).
## Warning: Removed 44 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.47929 49.75912
## [1] 28.30145 49.86576
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 67 rows containing missing values (geom_path).
## Warning: Removed 61 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 28.44329 45.30772 49.97069
## [1] 27.63873 43.44998 49.99510
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 50 rows containing missing values (geom_path).
## Warning: Removed 38 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.84127 49.62806
## [1] 28.67560 49.80281
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 73 rows containing missing values (geom_path).
## Warning: Removed 68 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1]  28.96619  48.70188  54.82675  67.07648  79.77992  93.16389  94.07128
## [8] 112.89958 124.92247
## [1] 28.21431 48.61489 64.37898 70.75416
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 261 rows containing missing values (geom_path).
## Warning: Removed 27 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.42551 52.56003
## [1] 29.22037
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 140 rows containing missing values (geom_path).
## Warning: Removed 108 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.62514 56.24207 86.01977
## [1] 28.89969
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 170 rows containing missing values (geom_path).
## Warning: Removed 72 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 26.92623 49.59538
## [1] 28.09066 49.95134
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 63 rows containing missing values (geom_path).
## Warning: Removed 49 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 25.77480 49.38496
## [1] 25.32845 44.52686 49.77013
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 68 rows containing missing values (geom_path).
## Warning: Removed 58 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 28.84771 49.45686
## [1] 28.16535 49.95349
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 74 rows containing missing values (geom_path).
## Warning: Removed 60 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.57213 48.97514
## [1] 21.9641
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 95 rows containing missing values (geom_path).
## Warning: Removed 83 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.08725 47.46381 54.01768 59.67201 66.61141
## [1] 27.84261 45.56285 53.01978
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 68 rows containing missing values (geom_path).
## Warning: Removed 38 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.10063 47.82766 58.30921
## [1] 27.72296 46.83401 51.43481 67.95053
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 104 rows containing missing values (geom_path).
## Warning: Removed 44 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 30.49882
## [1] 27.60675 51.74591
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 176 rows containing missing values (geom_path).
## Warning: Removed 47 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.91583
## [1] 28.56268 47.50758
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 146 rows containing missing values (geom_path).
## Warning: Removed 76 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 26.36999 49.58039
## [1] 23.93348 26.46660 48.66155 58.67341 70.01214
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 114 rows containing missing values (geom_path).
## Warning: Removed 39 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 28.22001 47.46210
## [1] 26.40161
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 126 rows containing missing values (geom_path).
## Warning: Removed 52 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 30.40159 51.32210
## [1] 28.28734 51.52082
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 128 rows containing missing values (geom_path).
## Warning: Removed 73 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 31.32814 58.21260
## [1] 35.73515 46.42201
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 185 rows containing missing values (geom_path).
## Warning: Removed 150 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 28.18847 49.56380 66.94906
## [1] 27.11976 42.60617
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 112 rows containing missing values (geom_path).
## Warning: Removed 51 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 27.58163 53.80045
## [1] 28.21500 44.29335 51.71413
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 148 rows containing missing values (geom_path).
## Warning: Removed 58 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 28.97286 51.73719 66.54099
## [1] 20.21115 28.52669 43.42987 50.44948 63.73275
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 104 rows containing missing values (geom_path).
## Warning: Removed 43 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 29.36324
## [1] 29.80429 47.11613
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 152 rows containing missing values (geom_path).
## Warning: Removed 100 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 27.82187 47.68040
## [1] 25.94531 47.03497 71.70123
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 102 rows containing missing values (geom_path).
## Warning: Removed 50 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 30.20706 49.22927
## [1] 28.11004
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 168 rows containing missing values (geom_path).
## Warning: Removed 121 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 30.20965 50.76486
## [1] 20.33493 28.69156 50.44338
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 104 rows containing missing values (geom_path).
## Warning: Removed 49 rows containing missing values (geom_path).
## Saving 7 x 5 in image
## [1] 36.03459 51.98885
## [1] 29.06670 52.06998
## Warning: Ignoring unknown aesthetics: fill
## Warning: Ignoring unknown aesthetics: fill
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 122 rows containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_path).
## Saving 7 x 5 in image